library(data.table)
library(dplyr)
library(tidyr)

dir.project <- './'
dir.data <- paste0(dir.project,'data/')
dir.raw <- paste0(dir.data,'raw/')
dir.generated <- paste0(dir.data,'generated/')

morg <- fread(paste0(dir.generated,'r/morg_data_stacked_90.csv')) %>% filter(wagesample == 1)
constants <- fread(paste0(dir.raw,'constants.csv'))

## First, add small random number to Pareto-imputed wages to make sure that histogram bins don't look strange

set.seed(1)
transformed_wages <- morg$hrwage16_pareto + rnorm(length(morg$hrwage16_pareto))*1e-4


wagebinsM <- quantile(transformed_wages[morg$cat3=='svc'], probs=seq(0,1,0.1))
wagebinsR <- quantile(transformed_wages[morg$cat3=='rt'], probs=seq(0,1,0.1))
wagebinsC <- quantile(transformed_wages[morg$cat3=='abs'], probs=seq(0,1,0.1))

hM <- hist(transformed_wages[morg$cat3=='svc'], breaks = wagebinsM, freq = TRUE)
hR <- hist(transformed_wages[morg$cat3=='rt'], breaks = wagebinsR, freq = TRUE)
hC <- hist(transformed_wages[morg$cat3=='abs'], breaks = wagebinsC, freq = TRUE)

N <- length(transformed_wages)

freqM <- hM$counts/N
freqR <- hR$counts/N
freqC <- hC$counts/N

## Prepare as output

fwrite(as.data.table(freqM),paste0(dir.generated,'r/freq_pareto_10_M.csv'))
fwrite(as.data.table(freqR),paste0(dir.generated,'r/freq_pareto_10_R.csv'))
fwrite(as.data.table(freqC),paste0(dir.generated,'r/freq_pareto_10_C.csv'))
fwrite(as.data.table(wagebinsM),paste0(dir.generated,'r/wagebins_pareto_10_M.csv'))
fwrite(as.data.table(wagebinsR),paste0(dir.generated,'r/wagebins_pareto_10_R.csv'))
fwrite(as.data.table(wagebinsC),paste0(dir.generated,'r/wagebins_pareto_10_C.csv'))

## generate other moments

# log-wage changes by occupation based on Figure 9 in AR (2019)

data.sumstats <- fread(paste0(dir.generated,'r/morg_sumstats_table.csv'))
data.wagechange <- fread(paste0(dir.raw,'AcemogluRestrepo2018/AR2019_wages_by_education.csv'))
data.combined <- data.sumstats %>% left_join(data.wagechange)
data.combined.summary <- data.combined %>% group_by(cat3) %>%
  mutate(weighted_change = school.sh * log_wage_change) %>%
    summarize(average_change = sum(weighted_change)) %>%
    mutate(occ = recode(cat3,`svc`='M',`rt`='R',`abs`='C')) %>% ungroup() %>%
    select(-cat3)

# adjust for the fact that the numbers in the figure by AR have been multiplied by 100, multiply by the relative change in robots and scale to the aggregate economy
K_B_0 <- constants$K_B_0_data[1]
K_B_1 <- constants$K_B_1_data[1]
# factors by which to scale wage and employment response to the aggregate economy, based on Acemoglu & Restrepo (2019)
AR_scale_wage <- constants$AR_scale_wage[1] 
AR_scale_emp <- constants$AR_scale_emp[1]
data.combined.summary$average_change = data.combined.summary$average_change * 0.01 * K_B_1/K_B_0 * AR_scale_wage

# save to disk
data.wage_change.wide <- spread(data.combined.summary,occ,average_change)
fwrite(data.wage_change.wide,paste0(dir.generated,'r/log_wage_change.csv'))

## Employment to population ratio changes based on Figure 8 in AR
dat.empchange <- fread(paste0(dir.raw,'AcemogluRestrepo2018/AR2019_employment_by_occupation_Figure_8.csv'))
dat.empchange.sum <- dat.empchange %>% group_by(occ) %>% summarise(emp_change = sum(employment_change))
# adjust for the fact that the numbers in the figure by AR have been multiplied by 100, multiply by the relative change in robots and scale to the aggregate economy
dat.empchange.sum$emp_change <- dat.empchange.sum$emp_change * 0.01 * K_B_1/K_B_0 * AR_scale_emp

dat.empchange.wide <- spread(dat.empchange.sum,occ,emp_change)
# save to disk
fwrite(dat.empchange.wide,paste0(dir.generated,'r/emp_change.csv'))
